Population GDP Avg Temperature
library(tidyverse)
[30m── [1mAttaching packages[22m ───────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──[39m
[30m[32m✔[30m [34mggplot2[30m 3.2.1 [32m✔[30m [34mpurrr [30m 0.3.3
[32m✔[30m [34mtibble [30m 2.1.3 [32m✔[30m [34mdplyr [30m 0.8.3
[32m✔[30m [34mtidyr [30m 1.0.2 [32m✔[30m [34mstringr[30m 1.4.0
[32m✔[30m [34mreadr [30m 1.3.1 [32m✔[30m [34mforcats[30m 0.4.0[39m
[30m── [1mConflicts[22m ──────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[30m [34mdplyr[30m::[32mfilter()[30m masks [34mstats[30m::filter()
[31m✖[30m [34mdplyr[30m::[32mlag()[30m masks [34mstats[30m::lag()[39m
library(anchors)
Loading required package: rgenoud
## rgenoud (Version 5.8-3.0, Build Date: 2019-01-22)
## See http://sekhon.berkeley.edu/rgenoud for additional documentation.
## Please cite software as:
## Walter Mebane, Jr. and Jasjeet S. Sekhon. 2011.
## ``Genetic Optimization Using Derivatives: The rgenoud package for R.''
## Journal of Statistical Software, 42(11): 1-26.
##
Loading required package: MASS
Attaching package: ‘MASS’
The following object is masked from ‘package:dplyr’:
select
## anchors (Version 3.0-8, Build Date: 2014-02-24)
## See http://wand.stanford.edu/anchors for additional documentation and support.
library(moderndive)
The code in this chunk below sums up all the cases for countries that are broken into regions so we can have one value per row for those countries. We had to do this for the US, China, and Australia.
virus = read_csv("covid_19_clean_complete.csv")
Parsed with column specification:
cols(
Province_State = [31mcol_character()[39m,
Country_Region = [31mcol_character()[39m,
Lat = [32mcol_double()[39m,
Long = [32mcol_double()[39m,
Date = [31mcol_character()[39m,
Confirmed = [32mcol_double()[39m,
Deaths = [32mcol_double()[39m,
Recovered = [32mcol_double()[39m
)
virus <- replace.value(virus,"Country_Region",from = "US",to = "United States") #Replacing value of US with value United States
#US_virus is the data with the cruise ships filtered out and sums up all the cases in each county and combines them into a total number of cases grouped by date
#No_US_virus is the data without the US and is just for all the other countries
#country_virus is the final dataset grouped by country
#US_virus = virus %>% filter(Country_Region == "US") %>% filter(Province_State != "Grand Princess Cruise Ship",Province_State != "Omaha, NE (From Diamond Princess)",Province_State != "Travis, CA (From Diamond Princess)",Province_State != "Lackland, TX (From Diamond Princess)",Province_State != "Unassigned Location (From Diamond Princess)") %>% group_by(Date) %>% summarize(Confirmed = sum(Confirmed),Deaths = sum(Deaths), Recovered = sum(Recovered)) %>% mutate("Province_State" = "US", "Country_Region" = "US") #OLD DATSET CODE; new dataset broken into cities and countries
#Code for Second Version of Dataset until 3/25
#US_virus = virus %>% filter(Country_Region == "United States")%>% group_by(Date) %>% separate(col = Province_State, into = c("City_or_County","State"),sep = ",") %>% filter(is.na(State)== T)%>% summarize(Confirmed = sum(Confirmed),Deaths = sum(Deaths), Recovered = sum(Recovered)) %>% mutate("Province_State" = "United States", "Country_Region" = "United States")
US_virus = virus %>% filter(Country_Region == "United States")%>% group_by(Date) %>% separate(col = Province_State, into = c("City_or_County","State"),sep = ",") %>% filter(is.na(State)== T)%>% summarize(Confirmed = sum(Confirmed),Deaths = sum(Deaths)) %>% mutate("Province_State" = "United States", "Country_Region" = "United States")
No_US_virus = virus %>% filter(Country_Region != "United States",Country_Region != "China",Country_Region != "Australia",Country_Region != "Canada")
country_virus = full_join(No_US_virus, US_virus)
Joining, by = c("Province_State", "Country_Region", "Date", "Confirmed", "Deaths")
country_virus %>% filter(Country_Region == "United States")
#For China
China_virus = virus %>% filter(Country_Region == "China") %>% group_by(Date) %>% summarize(Confirmed = sum(Confirmed),Deaths = sum(Deaths)) %>% mutate("Province_State" = "China", "Country_Region" = "China")
No_China_virus = virus %>% filter(Country_Region != "China", Country_Region != "United States",Country_Region != "Australia",Country_Region != "Canada")
country_virus1 = full_join(No_China_virus, China_virus)
Joining, by = c("Province_State", "Country_Region", "Date", "Confirmed", "Deaths")
#For Canada
Canada_virus = virus %>% filter(Country_Region == "Canada") %>% group_by(Date) %>% summarize(Confirmed = sum(Confirmed),Deaths = sum(Deaths)) %>% mutate("Province_State" = "Canada", "Country_Region" = "Canada")
No_Canada_virus = virus %>% filter(Country_Region != "China", Country_Region != "United States",Country_Region != "Australia", Country_Region != "Canada")
country_virus11 = full_join(No_Canada_virus, Canada_virus)
Joining, by = c("Province_State", "Country_Region", "Date", "Confirmed", "Deaths")
#For Australia
Australia_virus = virus %>% filter(Country_Region == "Australia")%>% group_by(Date) %>% summarize(Confirmed = sum(Confirmed),Deaths = sum(Deaths)) %>% mutate("Province_State" = "Australia", "Country_Region" = "Australia")
No_Australia_virus = virus %>% filter(Country_Region != "Australia", Country_Region != "United States",Country_Region != "China", Country_Region != "Canada")
country_virus2 = full_join(No_Australia_virus, Australia_virus)
Joining, by = c("Province_State", "Country_Region", "Date", "Confirmed", "Deaths")
predata = full_join(country_virus,country_virus1)
Joining, by = c("Province_State", "Country_Region", "Lat", "Long", "Date", "Confirmed", "Deaths", "Recovered")
mydata = full_join(predata,country_virus2)
Joining, by = c("Province_State", "Country_Region", "Lat", "Long", "Date", "Confirmed", "Deaths", "Recovered")
mydata2 = full_join(mydata,country_virus11)
Joining, by = c("Province_State", "Country_Region", "Lat", "Long", "Date", "Confirmed", "Deaths", "Recovered")
#mydata2 %>% filter(Country_Region == "Canada")
mydata2
#GDPandWeather = GDPandWeather %>% rename("Country_Region"= Country)
GDPandWeather
partialdata = left_join(mydata2,GDPandWeather,by = "Country_Region")
country_population_2 = country_population_1 %>% rename("Country_Region"= Country)
ourfinaldata = left_join(partialdata,country_population_2, by = "Country_Region")
Filter by the previous day to make sure we have the most recent data.
ourfinaldata %>% filter(Date == "2020-04-06")
write_csv(ourfinaldata, "ourfinaldata.csv")
library(lubridate)
Attaching package: ‘lubridate’
The following object is masked from ‘package:base’:
date
#Formatting the variable date as mdy and ordering it
ourfinaldata$Date <-as.Date(ourfinaldata$Date, format = "%m/%d/%y")
#One Plot of World Deaths
ourfinaldata %>% group_by(Date) %>% summarize(Total = sum(Confirmed), Dead_count = sum(Deaths)) %>% ggplot() + geom_line(aes(x = Date, y = Total,group=1, size = Dead_count)) + ggtitle("World Cases and Deaths Graph 1")
#Another Plot of World Deaths
ourfinaldata %>% group_by(Date) %>% summarize(Total = sum(Confirmed), Dead_count = sum(Deaths)) %>% ggplot() + geom_line(aes(x = Date, y = Total,group = 1), linetype = "dashed") + geom_line(aes(x = Date, y = Dead_count, group = 1), color = "red")+ ggtitle("World Cases and Deaths Graph 1")
#Deaths in China
ourfinaldata %>% filter(Country_Region == "China") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("China Cases and Deaths")
#Deaths in United States
ourfinaldata %>% filter(Country_Region == "United States") %>% group_by(Date) %>% arrange(Date)%>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("United States Cases and Deaths")
#Deaths in South Korea
ourfinaldata %>% filter(Country_Region == "South Korea") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("South Korea Cases and Deaths")
#Deaths in Italy
ourfinaldata %>% filter(Country_Region == "Italy") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Italy Cases and Deaths")
#Deaths in Spain
ourfinaldata %>% filter(Country_Region == "Spain") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Spain Cases and Deaths")
#Deaths in Iran
ourfinaldata %>% filter(Country_Region == "Iran") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Iran Cases and Deaths")
#Deaths in Canada
ourfinaldata %>% filter(Country_Region == "Canada") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Canada Cases and Deaths")
#Deaths in Japan
ourfinaldata %>% filter(Country_Region == "Japan") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Japan Cases and Deaths")
#Deaths in Costa Rica
ourfinaldata %>% filter(Country_Region == "Costa Rica") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Costa Rica Cases and Deaths")
#Deaths in Germany
ourfinaldata %>% filter(Country_Region == "Germany") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Germany Cases and Deaths")
#Deaths in United Kingdom
ourfinaldata %>% filter(Country_Region == "United Kingdom"& is.na(Province_State)) %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("United Kingdom Cases and Deaths")
#Deaths in France
ourfinaldata %>% filter(Country_Region == "France" & is.na(Province_State)) %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("France Cases and Deaths")
#Russia
ourfinaldata %>% filter(Country_Region == "Russia") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Russia Cases and Deaths")
#Egypt
ourfinaldata %>% filter(Country_Region == "Egypt") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Egypt Cases and Deaths")
#South Africa
ourfinaldata %>% filter(Country_Region == "South Africa") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("South Africa Cases and Deaths")
#Finlad
ourfinaldata %>% filter(Country_Region == "Finland") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Finland Cases and Deaths")
#Austria
ourfinaldata %>% filter(Country_Region == "Austria") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Austria Cases and Deaths")
#Portugal
ourfinaldata %>% filter(Country_Region == "Portugal") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Portugal Cases and Deaths")
#Indonesia
ourfinaldata %>% filter(Country_Region == "Indonesia") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Indonesia Cases and Deaths")
#Australia
ourfinaldata %>% filter(Country_Region == "Australia") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Australia Cases and Deaths")
#Mexico
ourfinaldata %>% filter(Country_Region == "Mexico") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Mexico Cases and Deaths")
#Serbia
ourfinaldata %>% filter(Country_Region == "Serbia") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Serbia Cases and Deaths")
#Iceland
ourfinaldata %>% filter(Country_Region == "Iceland") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Iceland Cases and Deaths")
#Afghanistan
ourfinaldata %>% filter(Country_Region == "Afghanistan") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Afghanistan Cases and Deaths")
#Norway
ourfinaldata %>% filter(Country_Region == "Norway") %>% group_by(Date) %>% ggplot() + geom_line(aes(x = Date, y = Confirmed,group =1), linetype = "dashed") + geom_line(aes(x = Date, y = Deaths,group =1), color = "red")+ ggtitle("Norway Cases and Deaths")
#Comparison of Total Cases in each country
ourfinaldata %>% filter(Country_Region == "United States"|Country_Region == "Italy"|Country_Region=="China"|Country_Region == "Spain"|Country_Region == "Iran"|Country_Region == "Australia"|Country_Region == "South Korea") %>% ggplot(mapping = aes(x=Date,y = Confirmed, color = Country_Region))+geom_line() +ggtitle("Total Confirmed Cases Over Time")
ourfinaldata %>% filter(Country_Region == "United States"|Country_Region == "Italy"|Country_Region=="China"|Country_Region == "Spain"|Country_Region == "Iran"|Country_Region == "Australia"|Country_Region == "South Korea") %>% ggplot(mapping = aes(x=Date,y = Deaths, color = Country_Region))+geom_line() +ggtitle("Total Confirmed Deaths Over Time")
Plots of the Percentage of Population Infected
#Note the y-axis for these graphs should be 0-100 because calculated as a percent but this shows how few people have it at this point
#USA
ourfinaldata %>%filter(Country_Region == "United States") %>%group_by(Date)%>% summarize(Total = sum(Confirmed),Pop = Population_2020) %>% mutate("Percent_Infected" = (Total/Pop)*100) %>% ggplot()+geom_line(mapping = aes(x = Date, y = Percent_Infected)) + ylim(0,1) +ggtitle("Percentange of People Infected in the USA")
#China
ourfinaldata %>%filter(Country_Region == "China") %>%group_by(Date)%>% summarize(Total = sum(Confirmed),Pop = Population_2020) %>% mutate("Percent_Infected" = (Total/Pop)*100) %>% ggplot()+geom_line(mapping = aes(x = Date, y = Percent_Infected)) + ylim(0,1) +ggtitle("Percentange of People Infected in China")
#Italy
ourfinaldata %>%filter(Country_Region == "Italy") %>%group_by(Date)%>% summarize(Total = sum(Confirmed),Pop = Population_2020) %>% mutate("Percent_Infected" = (Total/Pop)*100) %>% ggplot()+geom_line(mapping = aes(x = Date, y = Percent_Infected)) + ylim(0,1) +ggtitle("Percentange of People Infected in Italy")
#Spain
ourfinaldata %>%filter(Country_Region == "Spain") %>%group_by(Date)%>% summarize(Total = sum(Confirmed),Pop = Population_2020) %>% mutate("Percent_Infected" = (Total/Pop)*100) %>% ggplot()+geom_line(mapping = aes(x = Date, y = Percent_Infected)) + ylim(0,1) +ggtitle("Percentange of People Infected in Spain")
#India
ourfinaldata %>%filter(Country_Region == "India") %>%group_by(Date)%>% summarize(Total = sum(Confirmed),Pop = Population_2020) %>% mutate("Percent_Infected" = (Total/Pop)*100) %>% ggplot()+geom_line(mapping = aes(x = Date, y = Percent_Infected)) + ylim(0,1) +ggtitle("Percentange of People Infected in India")
#Iran
ourfinaldata %>%filter(Country_Region == "Iran") %>%group_by(Date)%>% summarize(Total = sum(Confirmed),Pop = Population_2020) %>% mutate("Percent_Infected" = (Total/Pop)*100) %>% ggplot()+geom_line(mapping = aes(x = Date, y = Percent_Infected)) + ylim(0,1) +ggtitle("Percentange of People Infected in Iran")
#South Africa
ourfinaldata %>%filter(Country_Region == "South Africa") %>%group_by(Date)%>% summarize(Total = sum(Confirmed),Pop = Population_2020) %>% mutate("Percent_Infected" = (Total/Pop)*100) %>% ggplot()+geom_line(mapping = aes(x = Date, y = Percent_Infected)) + ylim(0,1) +ggtitle("Percentange of People Infected in South Africa")
num_days_Italy = nrow(ourfinaldata %>% filter(Country_Region == "Italy",Confirmed >3))
Italy_dat = ourfinaldata %>% filter(Country_Region == "Italy",Confirmed >3) %>% mutate("Num_Days_Since_Start"=c(1:num_days_Italy))
#Using Nonlinear Least Squares Logistic Growth Model
Italy_mod = nls(Confirmed ~ c/(1+a*exp(b*Num_Days_Since_Start)), start = list(a=367.17, b = -0.195399, c = 120395.1478), data = Italy_dat)
coef(summary(Italy_mod))
Estimate Std. Error t value Pr(>|t|)
a 2.563575e+02 2.045028e+01 12.53565 1.929110e-16
b -1.615392e-01 2.965849e-03 -54.46642 1.991634e-43
c 1.522465e+05 1.683454e+03 90.43701 1.840141e-53
num_days_USA = nrow(ourfinaldata %>% filter(Country_Region == "United States",Confirmed >3))
USA_dat = ourfinaldata %>% filter(Country_Region == "United States",Confirmed >3) %>% mutate("Num_Days_Since_Start"=c(1:num_days_USA))
#Using Nonlinear Least Squares Logistic Growth Model
USA_mod = nls(Confirmed ~ c/(1+a*exp(b*Num_Days_Since_Start)), start = list(a=29066.21, b = -0.277, c = 332229.3), data = USA_dat)
Error in nls(Confirmed ~ c/(1 + a * exp(b * Num_Days_Since_Start)), start = list(a = 29066.21, :
singular gradient
my_counties = read_csv("usa_county_wise.csv")
Parsed with column specification:
cols(
UID = [32mcol_double()[39m,
iso2 = [31mcol_character()[39m,
iso3 = [31mcol_character()[39m,
code3 = [32mcol_double()[39m,
FIPS = [32mcol_double()[39m,
Admin2 = [31mcol_character()[39m,
Province_State = [31mcol_character()[39m,
Country_Region = [31mcol_character()[39m,
Lat = [32mcol_double()[39m,
Long_ = [32mcol_double()[39m,
Combined_Key = [31mcol_character()[39m,
Date = [31mcol_character()[39m,
Confirmed = [32mcol_double()[39m,
Deaths = [32mcol_double()[39m
)
my_counties$Date <-as.Date(my_counties$Date, format = "%m/%d/%y")
my_counties %>% filter(Province_State == "Pennsylvania" & Date == "2020-03-30")
my_counties %>% filter(Province_State == "Pennsylvania"|Province_State == "New York"|Province_State == "New Jersey"|Province_State == "Delaware"|Province_State == "California"|Province_State == "Michigan") %>% group_by(Date,Province_State) %>% summarize(state_total = sum(Confirmed))%>% ggplot(mapping = aes(x = Date, y = state_total,color = Province_State))+geom_line()+ggtitle("Confirmed Cases across the States")
my_counties %>% filter(Date == "2020-04-02") %>% group_by(Province_State) %>% summarize(state_total = sum(Confirmed)) %>% top_n(state_total,n=20) %>% arrange(desc(state_total))